clear all
set more off
cap log close

do "H:\Lavecchia_7086\to-transfer-jan-2022\RESTAT_Replication_Programs\0_Set_Directories.do"
*do "H:\Lavecchia_7086\to-transfer-jan-2022\1_Alisa_Do\0_Set_Directories.do"

****************************************************************************
* This do-file uses subsamples
*	- capital gains, income, dividends and rrsp related variables
* 
* Regressions
* 
****************************************************************************

clear
use "$dir_data\data_8299_with_demographics.dta"
/* Added by Adam on October 13, 2020 */
append using "$dir_data\data_0016_with_demographics.dta"

* merge with capital gains for selected years (8284, 8589)
sort lin__i year
merge 1:1 lin__i year using  "$dir_data\capgains_select.dta"
drop if _merge==2
drop _merge

sort lin__i 
merge lin__i using "$dir_data\data_8593_predicted.dta"
keep if _merge!=2
drop _merge

*****************
*keep if age82>=20 & age82<=60
keep if age90 >=20 & age90<=90 /* Changed on February 27, 2020 by Adam */
drop if flag==1
drop if flag2==1
drop flag flag2  
* flag_losses: 'wierd' observations where the 1982-2016 capital gains is less than the 1985-1993 capital gains (people with big capial losses)
gen flag_losses = (clkgxi8216 < clkgxi8593)
*****************

cap drop cg cg_actual
gen over150 = 150000   /* this represents $100K in 1993, converted to 2016 dollars */
gen over150_8593 = (clkgxi8593 >= 150000)
gen cg = 1 if (pred_lifetime_clkgxi_8216_v2 > 225000 & pred_lifetime_clkgxi_8216_v2 != .)            /* VERY HIGH CONROL GROUP */
*label define cg 1 "very high control"
replace cg = 2 if (pred_lifetime_clkgxi_8216_v2 > 175000 & pred_lifetime_clkgxi_8216_v2 <= 200000)   /* HIGH CONTROL GROUP */
*label define cg 2 "high control"
replace cg = 3 if (pred_lifetime_clkgxi_8216_v2 > 125000 & pred_lifetime_clkgxi_8216_v2 <= 175000)   /* MAIN TREATMENT GROUP */
*label define cg 3 "likely treated"
replace cg = 4 if (pred_lifetime_clkgxi_8216_v2 > 75000 & pred_lifetime_clkgxi_8216_v2 <= 125000)   /* 'BELOW' CONTROL GROUP */
*label define cg 4 "low control"
replace cg = 5 if (pred_lifetime_clkgxi_8216_v2 > 25000 & pred_lifetime_clkgxi_8216_v2 <= 75000) 
*label define cg 5 "very low control"
replace cg = 6 if (pred_lifetime_clkgxi_8216_v2 > 1000 & pred_lifetime_clkgxi_8216_v2 <= 25000)
*label define cg 6 " very, very high control"
replace cg = 7 if (pred_lifetime_clkgxi_8216_v2 <= 1000)
*label define cg 7 "low savers"



gen cg_actual = 1 if (clkgxi8216 > 225000 & clkgxi8216 != .)
replace cg_actual = 2 if (clkgxi8216 > 175000 & clkgxi8216 <= 225000)   /* 'ABOVE' CONTROL GROUP */
replace cg_actual = 3 if (clkgxi8216 > 125000 & clkgxi8216 <= 175000)   /* MAIN TREATMENT GROUP */
replace cg_actual = 4 if (clkgxi8216 > 75000 & clkgxi8216 <= 125000)   /* 'BELOW' CONTROL GROUP */
replace cg_actual = 5 if (clkgxi8216 > 25000 & clkgxi8216 <= 75000)   
replace cg_actual = 6 if (clkgxi8216 > 1000 & clkgxi8216 <= 25000)
replace cg_actual = 7 if (clkgxi8216 <= 1000)
*replace predicted8299=predicted8299/1.6378
*gen cg=10    if (predicted8299<=5000 )==1
*replace cg= 9 if (predicted8299>5000 & predicted8299<=10000)==1
*replace cg= 8 if (predicted8299>10000 & predicted8299<=25000)==1
*replace cg= 7 if (predicted8299>25000 & predicted8299<=50000)==1
*replace cg= 6 if (predicted8299>50000  & predicted8299<=100000)==1
*replace cg= 3 if (predicted8299>100000  & predicted8299<=200000)==1
*replace cg= 1 if (predicted8299>200000 & predicted8299!=. )==1

* GROUPS BASED ON 1985-1993 REPORTED CAPITAL GAINS
*  Change made on August 5, 2020 by Adam
* Previously groups defined based on 1982-1993 reported realizations
gen cg_8593 = 1 if clkgxi8593 >= 200000
replace cg_8593 = 2 if (clkgxi8593 >= 150000 & clkgxi8593 < 200000)
replace cg_8593 = 3 if (clkgxi8593 >= 100000 & clkgxi8593 < 150000)
replace cg_8593 = 4 if (clkgxi8593 >= 50000 & clkgxi8593 < 100000)
replace cg_8593 = 5 if (clkgxi8593 >= 1000 & clkgxi8593 < 50000)
replace cg_8593 = 6 if (clkgxi8593 < 1000)
gen cg_8593_alt_control = 1 if cg_8593 == 2 & (clkgxi8589 >= 150000 & clkgxi8589 < 200000) /* Change made on January 10, 2022 by Adam */


* GROUPS BASED ON 1985-1989 REPORTED CAPITAL GAINS
*  Change made on January 10, 2022 by Adam
gen cg_8589 = 1 if clkgxi8589 >= 200000
replace cg_8589 = 2 if (clkgxi8589 >= 150000 & clkgxi8589 < 200000)
replace cg_8589 = 3 if (clkgxi8589 >= 100000 & clkgxi8589 < 150000)
replace cg_8589 = 4 if (clkgxi8589 >= 50000 & clkgxi8589 < 100000)
replace cg_8589 = 5 if (clkgxi8589 >= 1000 & clkgxi8589 < 50000)
replace cg_8589 = 6 if (clkgxi8589 < 1000)


* GROUPS BASED ON 1982-2016 REPORTED CAPITAL GAINS
*  Change made on January 10, 2022 by Adam
gen cg_8216 = 1 if clkgxi8216 >= 200000
replace cg_8216 = 2 if (clkgxi8216 >= 150000 & clkgxi8216 < 200000)
replace cg_8216 = 3 if (clkgxi8216 >= 100000 & clkgxi8216 < 150000)
replace cg_8216 = 4 if (clkgxi8216 >= 50000 & clkgxi8216 < 100000)
replace cg_8216 = 5 if (clkgxi8216 >= 1000 & clkgxi8216 < 50000)
replace cg_8216 = 6 if (clkgxi8216 < 1000)


* GROUPS BASED ON 1982-1999 PREDICTED CAPITAL GAINS
*  Change made on January 10, 2022 by Adam
gen cg_8299 = 1 if predicted8299 >= 200000
replace cg_8299 = 2 if (predicted8299 >= 150000 & predicted8299 < 200000)
replace cg_8299 = 3 if (predicted8299 >= 100000 & predicted8299 < 150000)
replace cg_8299 = 4 if (predicted8299 >= 50000 & predicted8299 < 100000)
replace cg_8299 = 5 if (predicted8299 >= 1000 & predicted8299 < 50000)
replace cg_8299 = 6 if (predicted8299 < 1000)


* GROUPS BASED ON PREDICTED LIFETIME CAPITAL GAINS USING 1982-1984 x 10 REPORTED GAINS AS A PREDICTOR
*  Change made on January 10, 2022 by Adam
gen cg_8284_times10 = 1 if clkgxi8284_times10  >= 200000
replace cg_8284_times10 = 2 if (clkgxi8284_times10  >= 150000 & clkgxi8284_times10  < 200000)
replace cg_8284_times10 = 3 if (clkgxi8284_times10 >= 100000 & clkgxi8284_times10  < 150000)
replace cg_8284_times10 = 4 if (clkgxi8284_times10  >= 50000 & clkgxi8284_times10  < 100000)
replace cg_8284_times10 = 5 if (clkgxi8284_times10 >= 1000 & clkgxi8284_times10  < 50000)
replace cg_8284_times10 = 6 if (clkgxi8284_times10  < 1000)


sort lin__i year
merge lin__i year using "$dir_data\data_8299_with_mtr_noexemption.dta", update
drop if _merge==2
drop _merge

/* Added by Adam on October 13, 2020 */
sort lin__i year
merge lin__i year using "$dir_data\data_0016_with_mtr_noexemption.dta", update
drop if _merge==2
drop _merge

***************** CREATE NEW VARIABLES ***********
gen pos_clkgxi=(clkgxi>0)
gen pos_clkgxi1k=(clkgxi*cpi_to2016>2338.8)
*gen pos_xdiv=(xdiv_i>0)
*gen pos_rrspci=(rrspci>0)
*gen pos_invi=(invi_i>0)
*
gen onlypos_clkgxi=clkgxi*cpi_to2016  if clkgxi>0 
gen onlypos1k_clkgxi=clkgxi*cpi_to2016 if clkgxi*cpi_to2016>2338.8 
*gen onlypos_xdiv=xdiv_i*cpi_to2016  if xdiv_i>0
*gen onlypos_rrspci=rrspci*cpi_to2016  if rrspci>0
*gen onlypos_invi=invi_i*cpi_to2016  if invi_i>0
*
*gen age3=age^3


replace tnkidi=3 if tnkidi>=3
forvalues i=1(1)15{
gen tircd_`i'=(tirc_i>=(`i'-1)*10000 & tirc_i<(`i')*10000 )
}


compress
save "$dir_data\data_sample_IV_diffndiff.dta", replace

********************************************************************************************************************

